from scrapy.http import Request
from scrapy.spiders import CrawlSpider, Rule
from scrapy.linkextractors import LinkExtractor
from tutorial.items import TutorialItem
class DmozSpider(CrawlSpider):

    name = "dmoz"
    allowed_domains = ["24mn.me"]

    rules = (
        Rule(LinkExtractor(allow=r'hd2/tuimo'), callback='parse_item', follow=True),
    # r - 非转译  allow 连接提取的关键字（videos） callback 回调函数  follow 是否跟随所有的界面进行爬取
    )
    def start_requests(self):
        headers = { "User-Agent":'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_5) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/11.1.1 Safari/605.1.15' }
        yield Request("https://www.24mn.me",headers=headers)

    def parse_item(self,response):
        item = TutorialItem()
        item["title"] = response.xpath("//li/a/img/@alt").extract()
        item["link"] = response.xpath("//li/a/img/@src").extract()
        # item["desc"] = response.xpath("//li/a[@target=_blank]/").extract()
        yield item
